load("/data/nycdoe/school_info_with_lat_long.Rdata")
#loda bio data
load("/data/nycdoe/clean_data/common_data_frames/bio_data.Rdata")
load("/data/nycdoe/clean_data/common_data_frames/zoneData.Rdata")
boro_zip <- read.csv("/data/nycdoe/nyc_zip_to_borough.csv")
all_bio_data <- bio_data
zone_data <- zoneData
county <- c("005", "047", "061", "081", "085")
boro <- c("Bronx", "Brooklyn", "Manhattan", "Queens", "Staten")
county_bor <- data.frame(county, boro)
boro_zip_join <- full_join(boro_zip, county_bor, by = c("borough" = "boro"))
save(boro_zip_join, file = "/data/nycdoe/clean_data/boro_zip_join.Rdata")
load and select from zone data
zoneData <- zone_data %>% select(c(1,2,3,4,5,7,8))
nrow(zoneData) #9502347 rows in zoneData
## [1] 9502347
#zoneData %>% group_by(year) %>% filter(is.na(res_zip_cde)) #23891 rowa
bio_data <- bio_data %>% select(student_id_scram, year, dbn, grade_level)
join_zone_bio <- inner_join(bio_data, zoneData, by = c("student_id_scram", "year"))
join_zone_bio %>% filter(is.na(res_zip_cde) & is.na(census_tract))#21,780
## # A tibble: 21,780 x 9
## student_id_scram year dbn grade_level res_zip_cde zoned_elm_dbn
## <int> <dbl> <chr> <chr> <chr> <chr>
## 1 701002241 2007 08X405 09 <NA> <NA>
## 2 270002473 2007 13K336 06 <NA> <NA>
## 3 602002359 2007 31R044 04 <NA> <NA>
## 4 924102651 2007 11X178 02 <NA> <NA>
## 5 38102775 2007 08X069 01 <NA> <NA>
## 6 439202065 2007 02M047 07 <NA> <NA>
## 7 687202456 2007 19K328 05 <NA> <NA>
## 8 746202007 2007 21K281 06 <NA> <NA>
## 9 920202898 2007 75X723 04 <NA> <NA>
## 10 901202809 2007 17K353 06 <NA> <NA>
## # ... with 21,770 more rows, and 3 more variables: zoned_mid_dbn <chr>,
## # zoned_hs_dbn <chr>, census_tract <chr>
nrow(join_zone_bio) # 9465495 rows
## [1] 9465495
# with_grade <- all_bios %>% select(student_id_scram, year, dbn, grade_level)
# join_zone_bio_with_grade <- inner_join(with_grade, zoneData, by = c("student_id_scram", "year"))
# save(join_zone_bio_with_grade, file = "/data/nycdoe/clean_data/join_zone_bio_grade.Rdata")
student_go_to_schools <- join_zone_bio %>% select(c(1,2,3,4,5,9))
student_go_to_schools <- student_go_to_schools %>% mutate(res_zip_cde = as.integer(res_zip_cde))
## Warning in evalq(as.integer(res_zip_cde), <environment>): NAs introduced by
## coercion
#join to get county
student_go_to_schools_join_county <- full_join(student_go_to_schools, boro_zip_join, by = c("res_zip_cde" = "zipcode"))
student_go_to_schools_join_county <- student_go_to_schools_join_county %>% group_by(year, dbn, census_tract, county) %>% summarise(numStudents = n())
#filter na values
na_check <- student_go_to_schools_join_county %>% filter(is.na(census_tract) | is.na(county))
nrow(na_check)/nrow(student_go_to_schools_join_county)
## [1] 0.01311077
#dropping 0.01311077 (%) where either county or tract is na (county is na when there was no zip code)
student_go_to_schools_join_county <- student_go_to_schools_join_county %>% filter(!is.na(census_tract) | !is.na(county))
#coerce to int
student_go_to_schools_join_county <- student_go_to_schools_join_county %>% ungroup() %>% mutate(census_tract = as.integer(census_tract))
## Warning in evalq(as.integer(census_tract), <environment>): NAs introduced
## by coercion
#pad with 0's
student_go_to_schools_join_county <- student_go_to_schools_join_county %>% mutate(census_tract = sprintf("%06d", census_tract))
#put back to char
student_go_to_schools_join_county<- student_go_to_schools_join_county %>% mutate(census_tract = as.character(census_tract))
#Check what was dropped ???? Are zips listed in every year???? yes there are zips every year, a negligible amount were dropped (they had no data at all)
student_go_to_schools_join_county_join_latLong <- inner_join(student_go_to_schools_join_county, lat_long, by = "dbn")
student_go_to_schools_join_county_join_latLong_join_name <- inner_join(student_go_to_schools_join_county_join_latLong, school_info, by = c("dbn" = "ATS System Code"))
#load tract data
load('/data/nycdoe/nyc_tracts.Rdata')
#filter data to one year, one dbn
sample_map_data <- student_go_to_schools_join_county_join_latLong_join_name %>% filter(dbn =='02M475' & year ==2009)
#add the data to the sapcial file
tracts_map <- merge(nyc_tracts, sample_map_data, by.x = c("TRACTCE","COUNTYFP"), by.y =c("census_tract", "county"))
#design color palette
pal3 <- colorNumeric(palette = "Reds",
domain = range(tracts_map@data$numStudents, na.rm=T), na.color = "#cccccc")
#nice school icon
school_icon <- makeIcon(
iconUrl = "http://www.freeiconspng.com/uploads/high-school-icon-png-8.png",
iconWidth = 38, iconHeight = 38)
#############################################
#get schools zones shapefile (THIS IS ELEMENTARY SCHOOLS!!!!)
r<- GET("https://data.cityofnewyork.us/api/geospatial/cq6p-iwiy?method=export&format=GeoJSON")
dbns <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
## Warning in readOGR(content(r, "text"), "OGRGeoJSON", verbose = F): Dropping
## null geometries: 752
###############################################
#use leaflet to map data
leaflet(tracts_map) %>%
#addTiles() %>%
addPolygons(data = dbns, color = "black") %>%
addPolygons(weight = .5, fillColor = ~pal3(numStudents), popup = ~paste("Number of Students:", numStudents, "\nCensus Tract:", TRACTCE), fillOpacity = .7) %>%
addLegend(pal = pal3, values = ~numStudents, opacity = 1) %>%
addMarkers(~lon, ~lat, icon = school_icon) %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-73.98, 40.75, zoom = 13)
## Warning in validateCoords(lng, lat, funcName): Data contains 1275 rows with
## either missing or invalid lat/lon values and will be ignored
Make a function to do this for you
#function take dbn and make map
getMapFromDbn <- function(mydbn, myyear){
sample_map_data <- student_go_to_schools_join_county %>% filter(dbn == mydbn & year == myyear)
tracts_map <- merge(nyc_tracts, sample_map_data, by.x = c("TRACTCE","COUNTYFP"), by.y =c("census_tract", "county"))
pal3 <- colorNumeric(palette = "Reds",
domain = range(tracts_map@data$numStudents, na.rm=T), na.color = "#cccccc")
leaflet(tracts_map) %>%
#addTiles() %>%
addPolygons(weight = .5, fillColor = ~pal3(numStudents), popup = ~paste("Number of Students:", numStudents, "\nCensus Tract:", TRACTCE), fillOpacity = .7) %>%
addLegend(pal = pal3, values = ~numStudents, opacity = 1) %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-73.98, 40.75, zoom = 13)
}
getMapFromDbn("10X368", 2008)
save(student_go_to_schools_join_county, file = "/data/nycdoe/clean_data/students_go_to_school_join_county.Rdata")